{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# example of airllm on mac os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sys import platform\n",
    "\n",
    "assert platform == \"darwin\", \"this example is supposed to be run on mac os\"\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# install airllm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting airllm\n",
      "  Using cached airllm-2.7-py3-none-any.whl.metadata (13 kB)\n",
      "Collecting tqdm (from airllm)\n",
      "  Using cached tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\n",
      "Requirement already satisfied: torch in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from airllm) (2.1.0)\n",
      "Collecting transformers (from airllm)\n",
      "  Using cached transformers-4.36.2-py3-none-any.whl.metadata (126 kB)\n",
      "Collecting accelerate (from airllm)\n",
      "  Downloading accelerate-0.25.0-py3-none-any.whl.metadata (18 kB)\n",
      "Collecting safetensors (from airllm)\n",
      "  Downloading safetensors-0.4.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (3.8 kB)\n",
      "Collecting optimum (from airllm)\n",
      "  Downloading optimum-1.16.1-py3-none-any.whl.metadata (17 kB)\n",
      "Collecting huggingface-hub (from airllm)\n",
      "  Using cached huggingface_hub-0.20.1-py3-none-any.whl.metadata (12 kB)\n",
      "Collecting scipy (from airllm)\n",
      "  Downloading scipy-1.11.4-cp311-cp311-macosx_12_0_arm64.whl.metadata (165 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m165.4/165.4 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from accelerate->airllm) (1.26.2)\n",
      "Requirement already satisfied: packaging>=20.0 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from accelerate->airllm) (23.2)\n",
      "Requirement already satisfied: psutil in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from accelerate->airllm) (5.9.0)\n",
      "Requirement already satisfied: pyyaml in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from accelerate->airllm) (6.0.1)\n",
      "Requirement already satisfied: filelock in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from torch->airllm) (3.13.1)\n",
      "Requirement already satisfied: typing-extensions in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from torch->airllm) (4.7.1)\n",
      "Requirement already satisfied: sympy in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from torch->airllm) (1.12)\n",
      "Requirement already satisfied: networkx in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from torch->airllm) (3.1)\n",
      "Requirement already satisfied: jinja2 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from torch->airllm) (3.1.2)\n",
      "Requirement already satisfied: fsspec in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from torch->airllm) (2023.10.0)\n",
      "Requirement already satisfied: requests in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from huggingface-hub->airllm) (2.31.0)\n",
      "Collecting coloredlogs (from optimum->airllm)\n",
      "  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting datasets (from optimum->airllm)\n",
      "  Downloading datasets-2.16.0-py3-none-any.whl.metadata (20 kB)\n",
      "Collecting regex!=2019.12.17 (from transformers->airllm)\n",
      "  Downloading regex-2023.12.25-cp311-cp311-macosx_11_0_arm64.whl.metadata (40 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting tokenizers<0.19,>=0.14 (from transformers->airllm)\n",
      "  Downloading tokenizers-0.15.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.7 kB)\n",
      "Collecting sentencepiece!=0.1.92,>=0.1.91 (from transformers[sentencepiece]>=4.26.0->optimum->airllm)\n",
      "  Downloading sentencepiece-0.1.99-cp311-cp311-macosx_11_0_arm64.whl (1.2 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting protobuf (from transformers[sentencepiece]>=4.26.0->optimum->airllm)\n",
      "  Downloading protobuf-4.25.1-cp37-abi3-macosx_10_9_universal2.whl.metadata (541 bytes)\n",
      "Collecting humanfriendly>=9.1 (from coloredlogs->optimum->airllm)\n",
      "  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting pyarrow>=8.0.0 (from datasets->optimum->airllm)\n",
      "  Downloading pyarrow-14.0.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (3.0 kB)\n",
      "Collecting pyarrow-hotfix (from datasets->optimum->airllm)\n",
      "  Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)\n",
      "Collecting dill<0.3.8,>=0.3.0 (from datasets->optimum->airllm)\n",
      "  Downloading dill-0.3.7-py3-none-any.whl.metadata (9.9 kB)\n",
      "Collecting pandas (from datasets->optimum->airllm)\n",
      "  Downloading pandas-2.1.4-cp311-cp311-macosx_11_0_arm64.whl.metadata (18 kB)\n",
      "Collecting xxhash (from datasets->optimum->airllm)\n",
      "  Downloading xxhash-3.4.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (12 kB)\n",
      "Collecting multiprocess (from datasets->optimum->airllm)\n",
      "  Downloading multiprocess-0.70.15-py311-none-any.whl.metadata (7.2 kB)\n",
      "Collecting aiohttp (from datasets->optimum->airllm)\n",
      "  Downloading aiohttp-3.9.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (7.4 kB)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from requests->huggingface-hub->airllm) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from requests->huggingface-hub->airllm) (3.6)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from requests->huggingface-hub->airllm) (2.1.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from requests->huggingface-hub->airllm) (2023.11.17)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from jinja2->torch->airllm) (2.1.1)\n",
      "Requirement already satisfied: mpmath>=0.19 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from sympy->torch->airllm) (1.3.0)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from aiohttp->datasets->optimum->airllm) (23.1.0)\n",
      "Collecting multidict<7.0,>=4.5 (from aiohttp->datasets->optimum->airllm)\n",
      "  Downloading multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl (29 kB)\n",
      "Collecting yarl<2.0,>=1.0 (from aiohttp->datasets->optimum->airllm)\n",
      "  Downloading yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl.metadata (31 kB)\n",
      "Collecting frozenlist>=1.1.1 (from aiohttp->datasets->optimum->airllm)\n",
      "  Downloading frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (12 kB)\n",
      "Collecting aiosignal>=1.1.2 (from aiohttp->datasets->optimum->airllm)\n",
      "  Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from pandas->datasets->optimum->airllm) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from pandas->datasets->optimum->airllm) (2023.3.post1)\n",
      "Collecting tzdata>=2022.1 (from pandas->datasets->optimum->airllm)\n",
      "  Using cached tzdata-2023.3-py2.py3-none-any.whl (341 kB)\n",
      "Requirement already satisfied: six>=1.5 in /usr/local/anaconda3/envs/native/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas->datasets->optimum->airllm) (1.16.0)\n",
      "Using cached airllm-2.7-py3-none-any.whl (34 kB)\n",
      "Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m265.7/265.7 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading safetensors-0.4.1-cp311-cp311-macosx_11_0_arm64.whl (426 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m426.3/426.3 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hUsing cached huggingface_hub-0.20.1-py3-none-any.whl (330 kB)\n",
      "Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n",
      "Downloading optimum-1.16.1-py3-none-any.whl (403 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m403.3/403.3 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hUsing cached transformers-4.36.2-py3-none-any.whl (8.2 MB)\n",
      "Downloading scipy-1.11.4-cp311-cp311-macosx_12_0_arm64.whl (29.7 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m29.7/29.7 MB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading regex-2023.12.25-cp311-cp311-macosx_11_0_arm64.whl (291 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m291.0/291.0 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading tokenizers-0.15.0-cp311-cp311-macosx_11_0_arm64.whl (2.5 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.5/2.5 MB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading datasets-2.16.0-py3-none-any.whl (507 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading dill-0.3.7-py3-none-any.whl (115 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading aiohttp-3.9.1-cp311-cp311-macosx_11_0_arm64.whl (386 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m386.8/386.8 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading pyarrow-14.0.2-cp311-cp311-macosx_11_0_arm64.whl (24.0 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.0/24.0 MB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading multiprocess-0.70.15-py311-none-any.whl (135 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.4/135.4 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading pandas-2.1.4-cp311-cp311-macosx_11_0_arm64.whl (10.8 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.8/10.8 MB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading protobuf-4.25.1-cp37-abi3-macosx_10_9_universal2.whl (394 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m394.2/394.2 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n",
      "Downloading xxhash-3.4.1-cp311-cp311-macosx_11_0_arm64.whl (30 kB)\n",
      "Downloading frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl (53 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.4/53.4 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl (81 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.2/81.2 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hInstalling collected packages: sentencepiece, xxhash, tzdata, tqdm, scipy, safetensors, regex, pyarrow-hotfix, pyarrow, protobuf, multidict, humanfriendly, frozenlist, dill, yarl, pandas, multiprocess, huggingface-hub, coloredlogs, aiosignal, tokenizers, aiohttp, accelerate, transformers, datasets, optimum, airllm\n",
      "Successfully installed accelerate-0.25.0 aiohttp-3.9.1 aiosignal-1.3.1 airllm-2.7 coloredlogs-15.0.1 datasets-2.16.0 dill-0.3.7 frozenlist-1.4.1 huggingface-hub-0.20.1 humanfriendly-10.0 multidict-6.0.4 multiprocess-0.70.15 optimum-1.16.1 pandas-2.1.4 protobuf-4.25.1 pyarrow-14.0.2 pyarrow-hotfix-0.6 regex-2023.12.25 safetensors-0.4.1 scipy-1.11.4 sentencepiece-0.1.99 tokenizers-0.15.0 tqdm-4.66.1 transformers-4.36.2 tzdata-2023.3 xxhash-3.4.1 yarl-1.9.4\n"
     ]
    }
   ],
   "source": [
    "!pip install -U  airllm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# copy local code to test\n",
    "#!cp -r /Users/l_y_o/Work/Anima/air_llm/airllm/* /usr/local/anaconda3/envs/native/lib/python3.11/site-packages/airllm/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# test airllm mlx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from airllm import AutoModel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bc02b05b26854198b6bd124287f74bf7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "945203eb3eb34d48aa80fbf9bff634dd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Fetching 18 files:   0%|          | 0/18 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "667380fd90e74fde8e17a69d7f97a012",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json:   0%|          | 0.00/3.56M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f193778a98354093993a4fea300c5f82",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/320 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "91ed61f306cb4b84a10d32f92235800a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00003-of-00007.safetensors:   0%|          | 0.00/9.75G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "601451d9a6484f9795ef93ea1d07ccc7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.model:   0%|          | 0.00/1.03M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|                                                                                                                                                                                               | 0/63 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading shard 1/7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  2%|██▉                                                                                                                                                                                    | 1/63 [00:01<02:00,  1.94s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.embed_tokens.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  3%|█████▊                                                                                                                                                                                 | 2/63 [00:04<02:04,  2.04s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.0.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  5%|████████▋                                                                                                                                                                              | 3/63 [00:06<02:05,  2.09s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.1.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  6%|███████████▌                                                                                                                                                                           | 4/63 [00:08<02:01,  2.06s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.2.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  8%|██████████████▌                                                                                                                                                                        | 5/63 [00:10<01:55,  2.00s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.3.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 10%|█████████████████▍                                                                                                                                                                     | 6/63 [00:12<02:01,  2.13s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.4.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 11%|████████████████████▎                                                                                                                                                                  | 7/63 [00:15<02:08,  2.30s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.5.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 13%|███████████████████████▏                                                                                                                                                               | 8/63 [00:17<02:10,  2.38s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.6.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 14%|██████████████████████████▏                                                                                                                                                            | 9/63 [00:20<02:12,  2.45s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.7.mlx\n",
      "Loading shard 2/7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 16%|████████████████████████████▉                                                                                                                                                         | 10/63 [00:22<02:05,  2.37s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.8.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 17%|███████████████████████████████▊                                                                                                                                                      | 11/63 [00:24<02:02,  2.35s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.9.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 19%|██████████████████████████████████▋                                                                                                                                                   | 12/63 [00:27<01:59,  2.35s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.10.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|█████████████████████████████████████▌                                                                                                                                                | 13/63 [00:29<01:55,  2.32s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.11.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 22%|████████████████████████████████████████▍                                                                                                                                             | 14/63 [00:31<01:51,  2.28s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.12.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 24%|███████████████████████████████████████████▎                                                                                                                                          | 15/63 [00:34<01:53,  2.37s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.13.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 25%|██████████████████████████████████████████████▏                                                                                                                                       | 16/63 [00:36<01:52,  2.40s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.14.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 27%|█████████████████████████████████████████████████                                                                                                                                     | 17/63 [00:39<01:50,  2.41s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.15.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 29%|████████████████████████████████████████████████████                                                                                                                                  | 18/63 [00:41<01:51,  2.47s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.16.mlx\n",
      "Loading shard 3/7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 30%|██████████████████████████████████████████████████████▉                                                                                                                               | 19/63 [00:43<01:46,  2.42s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.17.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 32%|█████████████████████████████████████████████████████████▊                                                                                                                            | 20/63 [00:46<01:41,  2.37s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.18.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 33%|████████████████████████████████████████████████████████████▋                                                                                                                         | 21/63 [00:48<01:35,  2.27s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.19.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 35%|███████████████████████████████████████████████████████████████▌                                                                                                                      | 22/63 [00:50<01:33,  2.29s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.20.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 37%|██████████████████████████████████████████████████████████████████▍                                                                                                                   | 23/63 [00:52<01:29,  2.25s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.21.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 38%|█████████████████████████████████████████████████████████████████████▎                                                                                                                | 24/63 [00:54<01:25,  2.20s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.22.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 40%|████████████████████████████████████████████████████████████████████████▏                                                                                                             | 25/63 [00:57<01:24,  2.21s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.23.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 41%|███████████████████████████████████████████████████████████████████████████                                                                                                           | 26/63 [00:59<01:20,  2.18s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.24.mlx\n",
      "Loading shard 4/7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 43%|██████████████████████████████████████████████████████████████████████████████                                                                                                        | 27/63 [01:01<01:22,  2.28s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.25.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 44%|████████████████████████████████████████████████████████████████████████████████▉                                                                                                     | 28/63 [01:04<01:20,  2.29s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.26.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 46%|███████████████████████████████████████████████████████████████████████████████████▊                                                                                                  | 29/63 [01:06<01:15,  2.23s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.27.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 48%|██████████████████████████████████████████████████████████████████████████████████████▋                                                                                               | 30/63 [01:08<01:14,  2.26s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.28.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|█████████████████████████████████████████████████████████████████████████████████████████▌                                                                                            | 31/63 [01:10<01:11,  2.25s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.29.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 51%|████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                         | 32/63 [01:12<01:08,  2.21s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.30.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|███████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                      | 33/63 [01:15<01:07,  2.24s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.31.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 54%|██████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                   | 34/63 [01:17<01:08,  2.37s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.32.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                 | 35/63 [01:20<01:09,  2.48s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.33.mlx\n",
      "Loading shard 5/7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 57%|████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                              | 36/63 [01:22<01:03,  2.35s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.34.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 59%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                           | 37/63 [01:24<00:59,  2.28s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.35.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 60%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                        | 38/63 [01:27<00:57,  2.31s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.36.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 62%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                     | 39/63 [01:29<00:56,  2.35s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.37.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                  | 40/63 [01:32<00:55,  2.42s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.38.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 65%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                               | 41/63 [01:34<00:53,  2.45s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.39.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 67%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                            | 42/63 [01:36<00:50,  2.38s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.40.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 68%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                         | 43/63 [01:39<00:48,  2.41s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.41.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 70%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                       | 44/63 [01:41<00:46,  2.45s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.42.mlx\n",
      "Loading shard 6/7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                    | 45/63 [01:44<00:44,  2.48s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.43.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 73%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                 | 46/63 [01:46<00:41,  2.46s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.44.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                              | 47/63 [01:49<00:39,  2.47s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.45.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                           | 48/63 [01:51<00:35,  2.37s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.46.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                        | 49/63 [01:53<00:32,  2.35s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.47.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                     | 50/63 [01:56<00:30,  2.38s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.48.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                  | 51/63 [01:58<00:29,  2.42s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.49.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                               | 52/63 [02:01<00:27,  2.51s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.50.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                             | 53/63 [02:04<00:25,  2.56s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.51.mlx\n",
      "Loading shard 7/7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                          | 54/63 [02:06<00:22,  2.50s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.52.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                       | 55/63 [02:08<00:19,  2.43s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.53.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                    | 56/63 [02:11<00:17,  2.44s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.54.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                 | 57/63 [02:13<00:15,  2.51s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.55.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 58/63 [02:16<00:13,  2.61s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.56.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍           | 59/63 [02:19<00:10,  2.72s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.57.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎        | 60/63 [02:23<00:09,  3.17s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.58.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏     | 61/63 [02:41<00:15,  7.60s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.layers.59.mlx\n",
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/model.norm.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [02:45<00:00,  2.62s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--01-ai--Yi-34B/snapshots/9292541b776cae9f25cf40e14764dcffc12c8999/splitted_model/lm_head.mlx\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "model = AutoModel.from_pretrained(\"01-ai/Yi-34B\")#\"garage-bAInd/Platypus2-7B\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'input_ids': array([[59597,   947]])}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input_text = [\n",
    "        #'What is the capital of United States?',\n",
    "        'I like',\n",
    "    ]\n",
    "\n",
    "MAX_LENGTH = 128\n",
    "input_tokens = model.tokenizer(input_text,\n",
    "    return_tensors=\"np\", \n",
    "    return_attention_mask=False, \n",
    "    truncation=True, \n",
    "    max_length=MAX_LENGTH, \n",
    "    padding=False)\n",
    "\n",
    "input_tokens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [00:53<00:00,  1.12it/s]\n",
      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [00:55<00:00,  1.09it/s]\n",
      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [00:53<00:00,  1.12it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " to think that\n"
     ]
    }
   ],
   "source": [
    "\n",
    "import mlx.core as mx\n",
    "generation_output = model.generate(\n",
    "    mx.array(input_tokens['input_ids']), \n",
    "    max_new_tokens=3,\n",
    "    use_cache=True,\n",
    "    return_dict_in_generate=True)\n",
    "\n",
    "print(generation_output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# test Platypus2 7b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from airllm import AutoModel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9abc1702b4c34ed69aba9442d745cc29",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Fetching 0 files: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "92138b9c855b41c4a91eb92dee9404bf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saved layers already found in /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model\n"
     ]
    }
   ],
   "source": [
    "model = AutoModel.from_pretrained(\"garage-bAInd/Platypus2-7B\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'input_ids': array([[  1, 306, 763]])}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input_text = [\n",
    "        #'What is the capital of United States?',\n",
    "        'I like',\n",
    "    ]\n",
    "\n",
    "MAX_LENGTH = 128\n",
    "input_tokens = model.tokenizer(input_text,\n",
    "    return_tensors=\"np\", \n",
    "    return_attention_mask=False, \n",
    "    truncation=True, \n",
    "    max_length=MAX_LENGTH, \n",
    "    padding=False)\n",
    "\n",
    "input_tokens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "running layers:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:08<00:00,  3.95it/s]\n",
      "running layers:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:08<00:00,  3.66it/s]\n",
      "running layers:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:07<00:00,  4.06it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "to think of\n"
     ]
    }
   ],
   "source": [
    "\n",
    "import mlx.core as mx\n",
    "generation_output = model.generate(\n",
    "    mx.array(input_tokens['input_ids']), \n",
    "    max_new_tokens=3,\n",
    "    use_cache=True,\n",
    "    return_dict_in_generate=True)\n",
    "\n",
    "print(generation_output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
